# -*- coding: utf-8 -*-
from pymongo import MongoClient
from scrapy.utils.project import get_project_settings


class CatalogRepare(object):
    def __init__(self):
        settings = get_project_settings()
        self.mongo_uri = settings.get('MONGODB_URI')
        self.mongo_db = settings.get('MONGODB_DATABASE')
        self.client = MongoClient(self.mongo_uri)
        self.db = self.client[self.mongo_db]
        doc = self.db.get_collection('catalog_pool')
        if doc:
            sorted_pool = doc.find().sort("level")
            if sorted_pool:
                self._build_plain_cat(sorted_pool)

    def _build_plain_cat(self, pool):
        self.cat1_map = dict()
        self.cat2_map = dict()
        self.cat3_map = dict()
        for row in pool:
            if row.get('level') == 1:
                self.cat1_map[str(row.get('_id'))] = {
                    'cat1Id': str(row.get('_id')),
                    'cat1Name': row.get('catalogName'),
                }
            if row.get('level') == 2:
                cat1 = self.cat1_map[str(row.get('parentId'))]
                self.cat2_map[row.get('_id')] = {
                    'cat1Id': str(cat1.get('cat1Id')),
                    'cat1Name': cat1.get('cat1Name'),
                    'cat2Id': str(row.get('_id')),
                    'cat2Name': row.get('catalogName'),
                }
            if row.get('level') == 3:
                cat2 = self.cat2_map[str(row.get('parentId'))]
                self.cat3_map[row.get('_id')] = {
                    'cat1Id': str(cat2.get('cat1Id')),
                    'cat1Name': cat2.get('cat1Name'),
                    'cat2Id': str(cat2.get('cat2Id')),
                    'cat2Name': cat2.get('cat2Name'),
                    'cat3Id': str(row.get('_id')),
                    'cat3Name': row.get('catalogName'),
                }

    def repare(self, item):
        # 商品数据
        if str(item.get('catalog3Id')) and self.cat3_map and\
                (not str(item.get('catalog1Id')) or not item.get('catalog1Name')
                 or not str(item.get('catalog2Id')) or not item.get('catalog2Name')):
            cat3 = self.cat3_map.get(str(item.get('catalog3Id')))
            if cat3:
                # 一级分类编号
                item['catalog1Id'] = str(cat3.get('cat1Id'))
                # 一级分类名称
                item['catalog1Name'] = cat3.get('cat1Name')
                # 二级分类编号
                item['catalog2Id'] = str(cat3.get('cat2Id'))
                # 二级分类名称
                item['catalog2Name'] = cat3.get('cat2Name')
                # 三级分类名称
                item['catalog3Name'] = cat3.get('cat3Name')
        elif str(item.get('catalog2Id')) and self.cat2_map and \
                (not str(item.get('catalog1Id')) or not item.get('catalog1Name')):
            cat2 = self.cat2_map.get(str(item.get('catalog2Id')))
            if cat2:
                # 一级分类编号
                item['catalog1Id'] = str(cat2.get('cat1Id'))
                # 一级分类名称
                item['catalog1Name'] = cat2.get('cat1Name')
                # 二级分类名称
                item['catalog2Name'] = cat2.get('cat2Name')

        return item


if __name__ == '__main__':
    tool = CatalogRepare()
    items = tool.db.get_collection('data_20190514').find()
    for item in items:
        fixed = tool.repare(item)
        sp_id = fixed.get('supplierId')
        sp_sku_id = fixed.get('supplierSkuId')
        if sp_id and 'e.abchina.com' in fixed['supplierSkuLink']:
            try:
                if sp_id == 'JD1001':
                    fixed['supplierSkuLink'] = 'http://item.jd.com/{}.html'.format(sp_sku_id)
                elif sp_id == 'SN1002':
                    fixed['supplierSkuLink'] = 'http://product.suning.com/0000000000/{}.html'.format(sp_sku_id)
                elif sp_id == 'DL1003':
                    fixed['supplierSkuLink'] = 'https://b2b.nbdeli.com/Goods/ItemDetail_{}_40.htm'.format(
                        sp_sku_id.replace('PCS', ''))

                tool.db['data_20190514'].update({'_id': item['_id']}, {'$set': {
                    '_id': item['_id'],
                    'supplierSkuLink': fixed['supplierSkuLink'],
                }}, upsert=True)
                print(fixed.get('_id'))
            except Exception as e:
                _ = e
        else:
            print('exists...')
